from typing import Dict

import pathway as pw
from application.db.mysql_db.info.ResourceInformationList import ResourceInformationList
import re

class NsfcFieldCleaning:
    def apply(self, value):
        result = value.with_columns(
            data=pw.apply(
                self.date_cleaning,
                value.data
            )
        )
        return result

    def date_cleaning(self, data) -> Dict:
        """
        清洗数据中的 info_date 字段，并提取标准日期 YYYY-MM-DD
        """
        data = data.as_dict()
        raw_date = data.get('info_date','')
        # 匹配 YYYY-MM-DD 格式
        match = re.search(r'\d{4}-\d{1,2}-\d{1,2}', raw_date)
        if match:
            # print(match.group())
            data['info_date'] = match.group()
        return data
